*! version 5.0
* 13 August 2018
* NIDS
* Master Income do file for Nids Wave 2

* THIS IS 3rd INCOME DO FILE - PREPARING VARIABLES FOR IMPUTATION: 3 OF 7
* THIS DO FILE PREPARES THE RELEVANT INCOME VARIABLES FOR IMPUTATION

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "Master Income do file (1 of 7)" 

version 12.0

*=====================================================================================================================================

* OPENING THE DATASET CREATED IN THE DO FILE "Income - Merging datasets to create income variables (2 of 7)"

**************
*PREP DO FILE*
**************

*DO FILE 3 OF 7
*THIS DO FILE PREPARES THE RELEVANT INCOME VARIABLES FOR IMPUTATION

use "$DataOUT\data.dta", clear

*****************
*Response status*
*****************

*Variable that indicates whether the individual has a response at all (including proxies)
gen response=outcome==1
label variable response "Did individual respond or not (incl. proxies)"
label values response dummy

*Variable that indicates the outcome of attempted sampling of this unit
gen responseoutcome=response
replace responseoutcome=2 if proxy==1
label define responseoutcome 0 "Non-response" 1 "Adult" 2 "Proxy"
label values responseoutcome responseoutcome 

*Individual month of interview
gen temp=.
replace temp=intrv_m if intrv_y==2010
replace temp=intrv_m+12 if intrv_y==2011
replace temp=25 if temp==.
rename temp intmonth

*Year of interview
gen int2010=intrv_y==2010
gen int2011=intrv_y==2011
replace int2010=1 if intrv_y==2008
replace int2010=1 if intrv_y==-3

**************
*Demographics*
**************

********
*Gender*
********
recode best_gen (1=1 "Male") (2=0 "Female"), gen(male)
replace male=roster_male if male==.
label variable male "Male dummy"
gen male_d=male!=.

******
*Race*
******

rename best_race race
recode race (min/0=.)
gen race_d=race!=.
replace race=1 if race==. 
*Set race to African for all non-responders, but we control for this with a dummy.
recode race (1=1 "African") (2/5=0 "Non-African"), gen(african)
recode race (2=1 "Coloured") (3/5=0 "Non-Coloured") (1=0 "Non-Coloured") , gen(coloured)
recode race (3=1 "Asian_Indian") (1/2=0 "Non-asian_indian") (4/5=0 "Non-asian_indian"), gen(asian_indian)
recode race (4=1 "White") (1/3=0 "Non-White") (5=0 "Non-White"), gen(white)
recode race (5=1 "Other") (1/4=0 "Non-Other"), gen(other)

*****
*Age*
*****

*(Note to self: use best age once that's been created.)

replace w2_best_age_yrs=0 if w2_best_age_yrs==.
gen age_d=w2_best_age_yrs==0
gen agesq=w2_best_age_yrs^2
label variable agesq "Age squared at interview"
gen agecu=w2_best_age_yrs^3
label variable agecu "Age cubed at interview"

*Retirement-aged individuals
gen retirement=0
replace retirement=1 if w2_best_age_yrs>=65 & male==1
replace retirement=1 if w2_best_age_yrs>=60 & male==0

***********
*Schooling*
***********

*Note to self: (Use best_edu when this is available.)

recode best_edu (-9/0=0) (1=1 "Grade 1") (2=2 "Grade 2") (3=3 "Grade 3") (4=4 "Grade 4") (5=5 "Grade 5") ///
(6=6 "Grade 6") (7=7 "Grade 7") (8=8 "Grade 8") (9=9 "Grade 9") (10=10 "Grade 10") (11=11 "Grade 11") ///
(12=12 "Grade 12") (13=10) (14=11) (15=12) (25=0 "No schooling") (16/24=12) (26/max=.), gen(schooling)
gen schooling_d=schooling!=.
gen schoolingsq=schooling^2
label variable schooling "Schooling"

*Education dummies
recode best_edu (-9/0=0) (0/12=0) (12/15=0) (25=0), gen(postschool)
replace postschool=0 if best_edu<0 & best_edu>=-9 & schooling<12
gen postschool_d=postschool!=.
tab postschool, gen(best_edu)
rename best_edu2 cert_nomat
rename best_edu3 dip_nomat
rename best_edu4 cert_mat
rename best_edu5 dip_mat
rename best_edu6 bachelors
rename best_edu7 bach_dip
rename best_edu8 honours
rename best_edu9 postgrad
rename best_edu10 othered

*Number of post-school years
recode postschool (16/17=1) (18/19=1) (20=3) (21=4) (22=4) (23=6) (24=0), gen(postschoolyears)

*Proxy for work experience
gen experience=w2_best_age_yrs-6-schooling-postschoolyears

*Trade union membership
gen tradeunion=em1tru==1
label variable tradeunion "Trade union membership"

**********************************************************************************
*Biochildren variable (number of biological children under 15 still in household)*
**********************************************************************************

*Counting the number of biological children 15 or younger still living in this household
gen biochildren=0
label variable biochildren "Number of biological children 15 or younger still living with this mother"
foreach x of numlist 1/16 {
cap destring bhdob_y`x', replace
gen biobirth`x'year=bhdob_y`x'
replace biobirth`x'year=. if biobirth`x'year>2011 & biobirth`x'year!=.
replace biobirth`x'year=. if biobirth`x'year<1900
replace biochildren=biochildren+1 if biobirth`x'year>=1997 & biobirth`x'year!=. &  bhlive`x'==1
}

replace biochildren=. if biochildren==0 & male==1
replace biochildren=. if proxy==1

egen hh_biochildren=sum(biochildren), by(w2_hhid)

****************
*"Other" income*
****************

gen othe_rec=inco==1
replace othe_rec=. if response!=1
recode inco_v (-9/0=.), gen(othe)
label variable othe "Income from other sources"
gen othe_d=othe!=.
label variable othe_d "Respondent has non-missing other income data"
label values othe_d dummy

*********************
*Main wage variables*
*********************

*Only imputing net wage

*Earning a wage for main or secondary job
gen working=1 if em1==1
replace working=1 if prox_emp==1 
replace working=0 if working!=1 & response==1
gen working2=1 if em2==1
replace working2=0 if working2!=1 & response==1

* Main Job
* Net point estimates
recode em1pay (-9/0=.) (500000/max=.), gen(fwag_p)
*Net brackets
gen temp=.
replace temp=350 if em1inc_brac4==3
replace temp=700 if em1inc_brac4==2
replace temp=850 if em1inc_brac4==1 & em1inc_brac2==3
replace temp=1000 if em1inc_brac2==2
replace temp=1400 if em1inc_brac2==1 & em1inc_brac1==3
replace temp=1800 if em1inc_brac1==2
replace temp=2900 if em1inc_brac1==1 & em1inc_brac3==3
replace temp=4000 if em1inc_brac3==2
replace temp=6000 if em1inc_brac3==1 & em1inc_brac5==3
replace temp=8000 if em1inc_brac5==2
replace temp=16000 if em1inc_brac5==1
gen fwag_ib=temp
drop temp
*Proxy income brackets
gen temp=.
replace temp=350 if prox_em1inc_s4==3
replace temp=700 if prox_em1inc_s4==2
replace temp=850 if prox_em1inc_s4==1 & prox_em1inc_s2==3
replace temp=1000 if prox_em1inc_s2==2
replace temp=1400 if prox_em1inc_s2==1 & prox_em1inc_s1==3
replace temp=1800 if prox_em1inc_s1==2
replace temp=2900 if prox_em1inc_s1==1 & prox_em1inc_s3==3
replace temp=4000 if prox_em1inc_s3==2
replace temp=6000 if prox_em1inc_s3==1 & prox_em1inc_s5==3
replace temp=8000 if prox_em1inc_s5==2
replace temp=16000 if prox_em1inc_s5==1

gen fwag_pib=temp
drop temp

*Cleaning up proxy brackets
replace fwag_pib=. if prox_emp!=1
gen fwag_pib_d=fwag_pib!=.
label values fwag_pib_d dummy
label variable fwag_pib_d "Dummy to indicate if net wages were sourced from proxy income brackets"
replace fwag_ib=fwag_pib if proxy==1 & fwag_pib!=. & fwag_ib==.
gen fwag_ib_d=fwag_ib!=.
label values fwag_ib_d dummy
label variable fwag_ib_d "Dummy to indicate if net wages were sourced from income brackets"

*Second job
recode em2pay (-9/0=.), gen(fwag_p2)
label variable fwag_p2 "Net pay from second job"
gen temp=.
replace temp=100 if em2inc_brac4==3
replace temp=200 if em2inc_brac4==2
replace temp=300 if em2inc_brac4==1 & em2inc_brac2==3
replace temp=400 if em2inc_brac2==2
replace temp=550 if em2inc_brac2==1 & em2inc_brac1==3
replace temp=700 if em2inc_brac1==2
replace temp=1200 if em2inc_brac1==1 & em2inc_brac3==3
replace temp=1700 if em2inc_brac3==2
replace temp=3350 if em2inc_brac3==1 & em2inc_brac5==3
replace temp=5000 if em2inc_brac5==2
replace temp=10000 if em2inc_brac5==1
gen fwag_p_ib2=temp
drop temp
gen fwag2=fwag_p2
replace fwag2=fwag_p_ib2 if fwag_p_ib2!=. & fwag2==.
label variable fwag2 "Net pay from second job"

*Net wages incorporating income bracket and proxy income bracket net wages as well as second job wages
gen fwag=fwag_p
label variable fwag "Monthly take home pay from main job including brackets"
replace fwag=fwag_ib if fwag_ib!=. & fwag==.
egen fwag_temp=rowtotal(fwag fwag2)
replace fwag=fwag_temp
drop fwag_temp
replace fwag=. if fwag==0
label variable fwag "Take-home pay including brackets"

gen fwag_rec=working==1 | working2==1
replace fwag_rec=. if response==0
gen fwag_d=fwag!=.
lab var fwag_d "Respondent has non-missing net wage data"
lab val fwag_d dummy

***************************************
*Number of hours worked per week/month*
***************************************

*Working hours data
recode em1hrs (-9/0=.), gen(weeklyhours)
recode em2hrs  (-9/0=.), gen(weeklyhours2)
egen weeklyhours_temp=rowtotal(weeklyhours weeklyhours2), m
replace weeklyhours=weeklyhours_temp
replace weeklyhours=. if weeklyhours>126 | weeklyhours==0
gen monthlyhours=weeklyhours*(22/5)
quietly sum monthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours.
gen imputemonthlyhours=0 if working==1
replace imputemonthlyhours=1 if monthlyhours==. & working==1
replace monthlyhours=r(p50) if monthlyhours==. & working==1

**************
*Bonus income*
**************

*13th cheque
gen cheq_rec=em1cheqlm==1
replace cheq_rec=. if response==0
recode em1cheqlm_a (-9/0=.), gen(cheq)
lab var cheq "Monthly income from cheq_pa from main job"
gen cheq_d=cheq!=.
label variable cheq_d "Respondent has non-missing 13th cheque data"
label values cheq_d dummy

*Profit share
gen prof_rec=em1prflm==1
replace prof_rec=. if response==0
recode em1prflm_a (-9/0=.), gen(prof)
label variable prof "Monthly income from prof_pa from main job"
gen prof_d=prof!=.
label variable prof_d "Respondent has non-missing profit share data"
label values prof_d dummy

*Bonus
gen bonu_rec=em1bonlm==1
replace bonu_rec=. if response==0
recode em1bonlm_a (-9/0=.), gen(bonu)
label variable bonu "Monthly income from other bonuses from main job"
gen bonu_d=bonu!=.
label variable bonu_d "Respondent has non-missing other bonus data"
label values bonu_d dummy

*Piece-rate income
gen extr_rec=em1pcrt==1
replace extr_rec=. if response==0
recode em1pcrt_a (-9/0=.) , gen(extr)
label variable extr "Monthly income on a piece rate basis"
gen extr_d=extr!=.
label variable extr_d "Respondent has non-missing piece-rate data"
label values extr_d dummy

*************************
*Casual labour variables*
*************************

*Casual labour dummy
gen cwag_rec=emc==1
replace cwag_rec=. if response==0

*Casual work wages
recode emcinc (-9/0=.), gen(cwag_p)
label variable cwag_p "Income from casual job"

*Income from casual work in brackets
gen temp=.
replace temp=100 if emcinc_brac4==3
replace temp=200 if emcinc_brac4==2
replace temp=275 if emcinc_brac4==1 & emcinc_brac2==3
replace temp=350 if emcinc_brac2==2
replace temp=475 if emcinc_brac2==1 & emcinc_brac1==3
replace temp=600 if emcinc_brac1==2
replace temp=800 if emcinc_brac1==1 & emcinc_brac3==3
replace temp=1000 if emcinc_brac3==2
replace temp=1500 if emcinc_brac3==1 & emcinc_brac5==3
replace temp=2000 if emcinc_brac5==2
replace temp=4000 if emcinc_brac5==1
gen cwag_p_ib=temp
drop temp

*Income from casual work, point estimates and brackets
gen cwag=cwag_p
label variable cwag "Monthly net pay from casual work including brackets"
replace cwag=cwag_p_ib if cwag==. & cwag_p_ib!=.
gen cwag_d=cwag!=.
label variable cwag_d "Respondent has non-missing casual pay data including brackets"
label values cwag_d dummy

*Hourly casual wages variables
recode emchrs (-9/0=.), gen(cmonthlyhours)
replace cmonthlyhours=. if cmonthlyhours>550 & cmonthlyhours!=.
quietly sum cmonthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours 
gen cimputemonthlyhours=1 if cmonthlyhours==. & cwag_rec==1
replace cimputemonthlyhours=1 if cimputemonthlyhours!=1 & cwag_rec==1
replace cmonthlyhours=r(p50) if cmonthlyhours==. & cwag_rec==1

*****************
*Self-employment*
*****************

*Self-employed
gen sworking=ems==1
replace sworking=. if response==0
replace sworking=0 if response==1 & sworking==.
replace sworking=1 if prox_emp==2

*Earnings from self-employment
recode emsincmn (-9/0=.), gen(swag_p)
label variable swag_p "Earnings from self-employment"
gen swag_p_d=swag_p!=.
label variable swag_p_d "Respondent has non-missing self-employment data"
label values swag_p_d dummy

*Earnings from self-employment in brackets
gen temp=.
replace temp=50 if emsinc_brac4==3
replace temp=100 if emsinc_brac4==2
replace temp=150 if emsinc_brac4==1 & emsinc_brac2==3
replace temp=200 if emsinc_brac2==2
replace temp=450 if emsinc_brac2==1 & emsinc_brac1==3
replace temp=700 if emsinc_brac1==2
replace temp=1450 if emsinc_brac1==1 & emsinc_brac3==3
replace temp=2200 if emsinc_brac3==2
replace temp=4850 if emsinc_brac3==1 & emsinc_brac5==3
replace temp=7500 if emsinc_brac5==2
replace temp=15000 if emsinc_brac5==1
gen swag_p_ib=temp
drop temp

/*For self-employed proxies*/
gen temp=.
replace temp=350 if prox_em1inc_s4==3
replace temp=700 if prox_em1inc_s4==2
replace temp=850 if prox_em1inc_s4==1 & prox_em1inc_s2==3
replace temp=1000 if prox_em1inc_s2==2
replace temp=1400 if prox_em1inc_s2==1 & prox_em1inc_s1==3
replace temp=1800 if prox_em1inc_s1==2
replace temp=2900 if prox_em1inc_s1==1 & prox_em1inc_s3==3
replace temp=4000 if prox_em1inc_s3==2
replace temp=6000 if prox_em1inc_s3==1 & prox_em1inc_s5==3
replace temp=8000 if prox_em1inc_s5==2
replace temp=16000 if prox_em1inc_s5==1
gen swag_p_pib=temp
drop temp

replace swag_p_pib=. if prox_emp!=2
replace swag_p_ib=swag_p_pib if swag_p_pib!=. & swag_p_ib==.

*Income from self-employment, point estimates and brackets
gen swag=swag_p
label variable swag "Monthly income from self-employment including brackets"
replace swag=swag_p_ib if swag==. & swag_p_ib!=.
replace swag=. if swag==0
replace swag=. if sworking!=1

gen swag_rec=sworking
gen swag_d=swag!=.
label variable swag_d "Respondent has non-missing self employment earnings data including brackets"
label values swag_d dummy

*Hourly self-employment wages
recode emshrs (-9/0=.), gen(sweeklyhours)
gen smonthlyhours=sweeklyhours*(22/5)
replace smonthlyhours=. if smonthlyhours>550 & smonthlyhours!=.
quietly sum smonthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours 
gen simputemonthlyhours=1 if smonthlyhours==. & sworking==1
replace simputemonthlyhours=1 if simputemonthlyhours!=1 & sworking==1
replace smonthlyhours=r(p50) if smonthlyhours==. & sworking==1

***************************************
*Income from helping friend's business*
***************************************

*Helping a friend
gen help_rec=1 if emhearn==1
replace help_rec=0 if help_rec==. & response==1
replace help_rec=. if response==0

*Income from helping friend
recode emhearn_v (-9/0=.), gen(help)
label variable help "Monthly income from helping friends with their business"
replace help=. if help==0

gen help_d=help!=.
label variable help_d "Respondent has non-missing help-friend data"
label values help_d dummy

*Hours of work helping friends with business
recode emhhrs (-9/-3=.) (140/max=.), gen(hf_weeklyhours)
gen hf_monthlyhours=hf_weeklyhours*(22/5)
quietly sum hf_monthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours 
gen imp_hf_monthlyhours=1 if hf_monthlyhours==. & help_rec==1
replace imp_hf_monthlyhours=1 if imp_hf_monthlyhours!=1 & help_rec==1
replace hf_monthlyhours=r(p50) if hf_monthlyhours==. & help_rec==1

*Hourly earnings from helping friend
gen hr_help=help/hf_monthlyhours
gen lnhr_help=ln(hr_help)


***********************
*State Old Age Pension*
***********************

/*Amount of the SOAP:
	- Before 1 April 2010: 1010
	- From 1 April 2010 to 1 April 2011: 1080
	- After 1 April 2011: 1140 
All state pensions above a threshold of R1180 per month are reallocated to private pensions.*/

*Receive State Old Age Pension
gen spen_rec=incgovpen==1
replace spen_rec=. if response==0

*Income from SOAP
recode incgovpen_v (-9/0=.), gen(spen)
label variable spen "Monthly income from state old age pension"

replace spen=1080 if spen==10080

*Reallocating high amounts to private pension (with a slight tolerance for measurement error) 
replace spen_rec=0 if spen>1180 & spen!=.
gen temp_ppen=spen if spen>1180 & spen!=.
replace spen=. if temp_ppen!=.

gen spen_d=spen!=.
label variable spen_d "Respondent has non-missing state pension data"
label values spen_d dummy

*****************************
*Private or foreign pensions*
*****************************

*Receive a private or foreign pension
gen ppen_rec=1 if incppen==1 | incret==1 | temp_ppen!=.
replace ppen_rec=0 if ppen_rec!=1 & response==1

*Income from private retirement funding
recode incppen_v (-9/0=.), gen(temp1)
recode incret_v (-9/0=.), gen(temp2)
egen ppen=rowtotal(temp1 temp2 temp_ppen), m
label variable ppen "Monthly income from private/foriegn penion pension and retirement annuities"
drop temp*

gen ppen_d=ppen!=.
label variable ppen_d "Respondent has non-missing private retirement funding income data"
label values ppen_d dummy

*****
*UIF*
*****

*Receive UIF
gen uif_rec=incuif==1
replace uif_rec=. if response==0

*Income from UIF
recode incuif_v (-9/0=.), gen(uif)
label variable uif "Monthly income from UIF payments"
replace uif=. if uif>20000 

gen uif_d=uif!=.
label variable uif_d "Respondent has non-missing uif income data"
label values uif_d dummy

************************
*Workmen's compensation*
************************

*Receive workmen's compensation
gen comp_rec=incwc==1
replace comp_rec=. if response==0

*Income from workmen's compensation
recode incwc_v (-9/0=.), gen(comp)
label variable comp "Monthly income from workmen's compensation payments"

gen comp_d=comp!=.
label variable comp_d "Respondent has non-missing workmen's compensation income data"
label values comp_d dummy

******************
*Disability grant*
******************

*Receive disability grant
gen dis_rec=incdis==1
replace dis_rec=. if response==0

*Income from disability grant
recode incdis_v (-9/0=.), gen(dis)
label variable dis "Monthly income from disability grant payments"

gen dis_d=dis!=.
label variable dis_d "Respondent has non-missing disability grant data"
label values dis_d dummy

*********************
*Child support grant*
*********************

*Receive child support grant
gen chld_rec=incchld==1
replace chld_rec=. if response==0

*Income from child support grant
recode incchld_v (-9/0=.), gen(chld)
label variable chld "Monthly income from child support grant"
replace chld=. if chld_rec!=1

gen chld_d=chld!=.
label variable chld_d "Respondent has non-missing child support grant data"
label values chld_d dummy

/*Number of children living with adult females.
This will not be the same as biochildren which is the number of bio children under 15
still living with this adult female.  It uses different data and includes children of any age.*/
recode bhlive_n (-3/0=.), gen(biochild)
replace biochild=0 if  bhbrth==2 | bhlive==2
gen biochild_nodata=1 if biochild==. & chld_rec==1
replace biochild_nodata=0 if biochild!=. & chld_rec==1
replace biochild=0 if biochild==. & chld_rec==1
replace biochild=hhchildren if biochild>hhchildren & hhchildren!=. & biochild!=.

*Household level biochild variable
egen hh_biochild=sum(biochild), by(w2_hhid)

*******************
*Foster care grant*
*******************

*Receive foster care grant
gen fost_rec=incfos==1
replace fost_rec=. if response==0

*Income from foster care grant
recode incfos_v (-9/0=.), gen(fost)
label variable fost "Monthly income from foster care grant"

gen fost_d=fost!=.
label variable fost_d "Respondent has non-missing foster care grant data"
label values fost_d dummy

***********************
*Care dependency grant*
***********************

*Receive care dependency grant
gen cdep_rec=inccare==1
replace cdep_rec=. if response==0

*Income from care dependency grant
recode inccare_v (-9/0=.), gen(cdep)
label variable cdep "Monthly income from care dependency grant"

gen cdep_d=cdep!=.
label variable cdep_d "Respondent has non-missing care dependency grant data"
label values cdep_d dummy

**************************
*Interest/dividend income*
**************************

*Receive interest/dividend income
gen indi_rec=incint==1
replace indi_rec=. if response==0

*Income from interest/dividends
recode incint_v (-9/0=.), gen(indi)
label variable indi "Monthly income from interest/dividends"

gen indi_d=indi!=.
label variable indi_d "Respondent has non-missing interest/dividend income data"
label values indi_d dummy

*************
*Inheritance*
*************

*Receive inheritance income
gen inhe_rec=incinh==1
replace inhe_rec=. if response==0

*Income from inheritance
recode incinh_v (-9/0=.), gen(inhe)
label variable inhe "Monthly income from inheritances"

gen inhe_d=inhe!=.
label variable inhe_d "Respondent has non-missing inheritance income data"
label values inhe_d dummy

***********************
*War veterans pension*
***********************

gen war_rec=incwar==1
replace war_rec=. if response==0 

recode incwar_v (-9/0=.), gen(war_income)
egen temp=rowtotal(othe war_income), m
replace othe=temp
drop temp
replace othe_rec=1 if othe!=. & othe_rec==0
replace othe_d=1 if othe!=.


***************
*Rental income*
***************

*Receive rental income
gen rnt_rec=incrnt==1
replace rnt_rec=. if response==0

*Rental income
recode incrnt_v (-9/0=.), gen(rnt)
label variable rnt "Monthly income from rentals"

gen rnt_d=rnt!=.
label variable rnt_d "Respondent has non-missing rental income data"
label values rnt_d dummy

*********************************
*Retrenchment/retirement package*
********************************

*Receive retrenchment/retirement package
gen retr_rec=incretr==1 | incretp==1
replace retr_rec=. if response==0

*Income from retrenchment package
recode incretr_v (-9/0=.), gen(temp1)
recode incretp_v(-9/0=.), gen(temp2)
egen retr=rowtotal(temp1 temp2), m
label variable retr "Monthly income from retrenchment package"
drop temp*

gen retr_d=retr!=.
label variable retr_d "Respondent has non-missing retrenchment payment data"
label values retr_d dummy

******************************
*Lobola/bride wealth payments*
******************************

*Receive lobola/bride wealth payment
gen brid_rec=inclob==1
replace brid_rec=. if response==0

*Income lobola/bride wealth payment
recode inclob_v (-9/0=.), gen(brid)
label variable brid "Monthly income from lobola/bride wealth payments"

gen brid_d=brid!=.
label variable brid_d "Respondent has non-missing lobola/bride wealth payment data"
label values brid_d dummy

*************
*Gift income*
*************

*Receive gift income
gen gift_rec=incgif==1
replace gift_rec=. if response==0

*Income from gifts
recode incgif_v (-9/0=.), gen(gift)
label variable gift "Monthly income from gifts"

gen gift_d=gift!=.
label variable gift_d "Respondent has non-missing gift income data"
label values gift_d dummy

***************************
*Repayment of loans to you*
***************************

*Receive repayment of loans income
gen loan_rec=incloan==1
replace loan_rec=. if response==0

*Income from repayments of loans to you
recode incloan_v (-9/0=.), gen(loan)
label variable loan "Monthly 'income' from loan repayments"

gen loan_d=loan!=.
label variable loan_d "Respondent has non-missing repayment of loans income data"
label values loan_d dummy

*************************
*Sale of household goods*
*************************

*Receive income from sale of household goods
gen sale_rec=incsale==1
replace sale_rec=. if response==0

*Income from sale of household goods
recode incsale_v (-9/0=.), gen(sale)
label variable sale "Monthly 'income' from sale of household goods"

gen sale_d=sale!=.
label variable sale_d "Respondent has non-missing sale of household goods income data"
label values sale_d dummy

*************
*Remittances*
*************

*Receive remittance income
gen remt_rec=cr==1
replace remt_rec=. if response==0

*Income from remittances

*Money remittances
foreach x of numlist 1/10 {
cap recode crmnv`x' (-9/0=.), gen(temp`x')
}

*Values of in-kind remittances

gen temp=1
while temp<=10 {
local x=temp
local y=temp+10
capture recode crkndmnv`x' (-9/0=.), gen(temp`y')
replace temp=temp+1
}
drop temp

*Aggregating remittances
egen remt=rowtotal(temp*), m
label variable remt "Monthly income from all remittances"
drop temp*

gen remt_d=remt!=.
label variable remt_d "Respondent has non-missing remittances received data"
label values remt_d dummy

*************
*AGRICULTURE*
*************

*Income from subsistence agirulture
gen plot_rec=empsll==1
*Also including income earned by working for others on subsistence agriculture
replace plot_rec=1 if empser==1 & plot==0
replace plot_rec=. if response!=1
recode empsll_v (min/0=.), gen(plot)
lab var plot "Income from selling produce"
gen plot_d=plot!=.
lab var plot_d "Respondent has non-missing agri. sell data"
lab val plot_d dummy

*Income from consumption of own produce
gen opro_rec=empconprod==1
replace opro_rec=. if response!=1
recode empconprod_v (-9/0=.), gen(opro)
label variable opro "Income from own production"
gen opro_d=opro!=.
label variable opro_d "Respondent has non-missing own production data"
label values opro_d dummy


************************************************************
*Household variables for household level income imputations*
************************************************************

*Household one-shot income
sort w2_hhid
gen hhq_incb_rec=1
replace hhq_incb_rec=. if w2_hh_outcome!=1
replace hhq_incb=. if hhq_incb==0

gen hhq_incb_d=hhq_incb!=.
label values hhq_incb_d dummy

*Household mode race
egen hhrace=mode(race), by(w2_hhid) minmode
egen hhracetemp=mode(race), by(w2_hhid) maxmode
replace hhrace=hhracetemp if hhrace==.
drop hhracetemp
gen hhrace_d=hhrace!=.
replace hhrace=0 if hhrace==.

*Maximum household education
egen hhedu=max(schooling), by(w2_hhid)
egen hhpostedu=max(postschool), by(w2_hhid)
recode hhpostedu (16/17=12) (18/19=13) (20/21=15) (22=16) (23=17) (24=.)
replace hhedu=hhpostedu if hhpostedu!=0 & hhpostedu!=.
gen hhedusq=hhedu^2

*Median household age
egen hhage=median(w2_best_age_yrs) if w2_best_age_yrs>=0 & w2_best_age_yrs<110, by(w2_hhid)
sort w2_hhid hhage

replace hhage=0 if hhage==.
egen max_age = max(hhage), by(w2_hhid)
drop if hhage!= max_age & hhage==0
drop max_age

gen hhage_d=hhage!=.
replace hhage=0 if hhage==.

*Dummy variable for a trade union member present in household
egen hhtu=max(tradeunion), by(w2_hhid)

save "$DataOUT\prepdata.dta", replace

* end of do file 

*---------------------------------------------------------------------------------------------------------------------------------------------

